package com.galeno.sparksql02

import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.types.{DataTypes, StructField, StructType}

/**
 * @Title: ${file_name}
 * @Description: ${todo}
 * @author galeno
 * @date 2021/9/522:49
 */
object SparkSqlZiDingYiFunction {
  def main(args: Array[String]): Unit = {

    val spark = SparkSession.builder()
      .master("local")
      .appName(this.getClass.getName)
      .getOrCreate()
    import spark.implicits._
    //加载特征数据id,name,age,height,weight,facevalue,score
    var schema=StructType(Seq(
        StructField("id",DataTypes.IntegerType),
        StructField("name",DataTypes.StringType),
        StructField("age",DataTypes.IntegerType),
        StructField("height",DataTypes.DoubleType),
        StructField("weight",DataTypes.DoubleType),
        StructField("facevalue",DataTypes.DoubleType),
        StructField("score",DataTypes.DoubleType),
      ))
    val df: DataFrame = spark.read.option("header", "true").schema(schema).csv("data/features.txt")
    df.show()
//    // 为了方便计算两人之间的余弦相似度，可以把表做自连接（笛卡尔）
//    df.createTempView("df")
//    val joined = spark.sql(
//      """
//        |
//        |select
//        |   a.id,
//        |   a.name,
//        |   b.id as bid,
//        |   b.name as bname,
//        |   array(a.age,a.height,a.weight,a.facevalue,a.score) as features_a,
//        |   array(b.age,b.height,b.weight,b.facevalue,b.score) as features_b
//        |from df a join df b on a.id != b.id
//        |
//        |""".stripMargin)
//    joined.createTempView("joined")
//    joined.show()






  }
}
